***Clean and use IPUMS Data to Test Migration Patterns***

/*Uses generated adjQOL measures from Albouy (2016) to test likelihood of moving for entrepreneurs and
non-entrepreneurs, distanced moved, and changes in rates of entrepreneurship.

Requires following files:
"${Data}/IPUMS_migration_1990_cbsa.dta" - Cleaned IPUMS File
"${Data}/IPUMS_migration_2000_cbsa.dta" - Cleaned IPUMS File
"${Data}/BP_Decomposition_cbsa_year.dta" BP_Decomposition_Data - Geographic Variables used in Table 7
"${Data}/distances.dta" - Distances between CBSAs from John Gardner Website
"${Data}/BP_CBSA.dta" - List of Best Places from Internet Appendix Table B.1
"${Data}/New_QOL_CBSA_1980_2000.dta" - Calculations based on Albouy 2016 - Available upon request
"${Data}/seegert_collapsed.dta" - Data from Nathan Seegert
"${Data}/CBSA_Geographic_Data.dta" - Cleaned and Aggregated Geographic Data
"${Data}/scaled_transactions_cbsa.dta" - Number of transactions per CBSA from Pratt Stats and Reference USA
*/

********************************************************************************
**************** Entrepreneurs per Laborer in CBSAs   **************************
********************************************************************************
use "${Data}/IPUMS_migration_1990_cbsa.dta", clear
drop entrepreneur
gen entrepreneur=0
replace entrepreneur=1 if inrange(classwkrd_head,10,19)
gen dummy=1
collapse (sum) entrepreneur dummy  [pw=hhwt], by(cbsa)

gen entreprenur_to_total_1990=entrepreneur/dummy
gen l_entreprenur_to_total_1990=ln(entrepreneur/dummy)
rename entrepreneur entrepreneur_total_1990
rename dummy people_total_1990
save "${Data}/IMPUS_entrepreneur_counts_cbsa_1990.dta", replace

********************************************************************************
*******************  BP Decomposition Indices   ********************************
********************************************************************************
use "${Data}/BP_Decomposition_cbsa_year.dta", clear

*Generate variables for BP Predictors to use in Table D.10
foreach v of var l_pop_1900 l_Precipitation l_Greater_90 l_Less_20 l_Sunlight l_Humidity l_Water Topography_Code Percent_UnHealthy_Air Student_to_Teacher property_crime_rate violent_crime_rate recreation_estab_pc eating_estab_pc drinking_estab_pc {
	gen `v'_2000 = `v' if year==2000
	gen `v'_1995 = `v' if year==1995
}

collapse (mean) l_pop_1900* l_Precipitation* l_Greater_90* l_Less_20* l_Sunlight* l_Humidity* l_Water* Topography_Code* ///
	Percent_UnHealthy_Air* Student_to_Teacher* property_crime_rate* violent_crime_rate* recreation_estab_pc* eating_estab_pc* drinking_estab_pc*,  by(cbsa)

save "${Data}/BP_Decomposition_cbsa_1995_2000.dta", replace

********************************************************************************
**************** Merge in Geogrpahic Data   ************************************
********************************************************************************
***Merge in cbsa_BPs***
use "${Data}/IPUMS_migration_2000_cbsa.dta", clear
mmerge cbsa using "${Data}/BP_CBSA.dta" 
drop if _merge==2
gen missing_CBSA=0
replace missing_CBSA=1 if BP_CBSA==.
replace BP_CBSA=0 if BP_CBSA==. /*For rural areas*/

mmerge mig5cbsa using "${Data}/BP_CBSA.dta", uname(mig5) umatch(cbsa)
drop if _merge==2
gen missing_mig5cbsaCBSA= mig5cbsa==.&moved==1
replace mig5BP_CBSA=0 if mig5BP_CBSA==.&moved==1 /*For rural areas*/


***Merge in other measures of BP (so the q_1990 and production)
mmerge cbsa using "${Data}/New_QOL_CBSA_1980_2000.dta", ukeep(q_1990 q_1980 q_2000 ax_*) uif(cbsa!=.) 
drop if _merge==2
mmerge mig5cbsa using "${Data}/New_QOL_CBSA_1980_2000.dta", ukeep(q_1990 q_1980 q_2000 ax_*) uif(cbsa!=.) uname(mig5) umatch(cbsa)
drop if _merge==2

mmerge cbsa using "${Data}/seegert_collapsed.dta", ukeep(production_amenity) uif(cbsa!=.)
drop if _merge==2

***Merge in decomposition
mmerge cbsa using "${Data}/BP_Decomposition_cbsa_1995_2000.dta",  uif(cbsa!=.) 
drop if _merge==2
mmerge mig5cbsa using "${Data}/BP_Decomposition_cbsa_1995_2000.dta", uif(cbsa!=.) uname(mig5) umatch(cbsa)
drop if _merge==2

*Changes in QOL
gen QOL_diff_1990= q_1990 - mig5q_1990
replace QOL_diff_1990=0 if moved==0
gen QOL_diff_2000 = q_2000 - mig5q_2000
replace QOL_diff_2000=0 if moved==0

gen ax_diff_1990=ax_true_1990 - mig5ax_true_1990
replace ax_diff_1990=0 if moved==0
gen ax_diff_2000=ax_true_2000 - mig5ax_true_2000
replace ax_diff_2000=0 if moved==0

gen QOL_diff_2000_1990=q_2000-q_1990 /*This is within CBSA*/
label var QOL_diff_2000_1990 "Change in CBSA QOL"

***Merge in othe geographic data***
mmerge cbsa year using "${Data}/CBSA_Geographic_Data.dta", uif(year==2000)
drop if _merge==2

mmerge cbsa year using "${Data}/scaled_transactions_cbsa.dta"
drop if _merge==2


********************************************************************************
********************* Setup for Regressions ************************************
********************************************************************************
keep if related==101
gen moved_outside_cbsa=0
replace moved_outside_cbsa=1 if moved==1&mig5cbsa!=cbsa
label var moved_outside_cbsa "Moved from another CBSA"
gen moved_outside_cbsa_entrep=moved_outside_cbsa*entrepreneur

*merge in distance moves
mmerge cbsa  mig5cbsa using "${Data}/distances.dta", umatch(cbsa1 cbsa2) ukeep(cbsa1 cbsa2 dists)
drop if _merge==2
replace dists=0 if mig5cbsa==cbsa
replace dists=0 if moved==0

***Generate a QOL to production measure***
/*Note: need to rescale to take into account negatives*/
sum q_2000
gen q_2000_scaled=1+q_2000+abs(r(min))
sum mig5q_2000
gen q_2000_scaled_mig=1+mig5q_2000+abs(r(min))
sum ax_true_2000
gen ax_true_2000_scaled=1+ax_true_2000+abs(r(min))
sum mig5ax_true_2000
gen ax_true_2000_scaled_mig=1+mig5ax_true_2000+abs(r(min))
gen QOL_Production_2000 = q_2000_scaled / ax_true_2000_scaled
winsor QOL_Production_2000, gen(QOL_Production_2000w) p(.01)
label var QOL_Production_2000w "QOL / Productivity (2000)"
egen QOL_Production_2000wstd=std(QOL_Production_2000w)
label var QOL_Production_2000wstd "QOL / Productivity (2000)"

gen QOL_Production_2000_mig = q_2000_scaled_mig / ax_true_2000_scaled_mig
winsor QOL_Production_2000_mig, gen(QOL_Production_2000_migw) p(.01)
label var QOL_Production_2000_migw "QOL / Productivity (2000)"
egen QOL_Production_2000_migwstd=std(QOL_Production_2000_migw)
label var QOL_Production_2000_migwstd "QOL / Productivity (2000)"

*FEs
global FEs "age_id female mar_id sch_id children min_group immig"
global local_controls cbsa_bachelors_or_higher l_pop_cbsa l_pop_density l_cbsa_tax l_median_house l_pcpi  ///
 d5_pop_cbsa d5_pcpi_cbsa d5_employment d5_med_house_val_cbsa production_amenity cbsa_percent_households_200k 
global output "yes"

gen l_hhincome=ln(hhincome)

sum moved_outside_cbsa if entrepreneur==1

*If person moved, those that are entrepreneurs had a significantly larger change in QOL
gen diff= QOL_Production_2000w- QOL_Production_2000_migw
label var diff "Difference between previous and current CBSA"
egen diffstd=std(diff)

*Test if change in QOL correlates with larger distances for entrepreneurs
label var entrepreneur "Entrepreneur (=1)"
label var diff "Change in QOL"
label var QOL_Production_2000wstd "QOL/Productivity"
replace dists = dists/100
label var dists "Distance between CBSA ('00 KM)"

save "${Data}/IPUM_QOL_Migration_regressions.dta", replace

********************************************************************************
*********************** Run Regressions ****************************************
********************************************************************************
use "${Data}/IPUM_QOL_Migration_regressions.dta", clear
est clear

*Table 8
*Tests on moved indicator
eststo moved1: reghdfe moved_outside_cbsa entrepreneur##c.QOL_Production_2000wstd [aw=hhwt], absorb($FEs ) cluster(cbsa)
		estadd local indivFE "Yes"
		estadd local indFE "No" 
		estadd local occupationFE "No" 
		estadd local cbsaFE "No"
		estadd local loccontrols "No"
		estadd local inc "No"
eststo moved2: reghdfe moved_outside_cbsa entrepreneur##c.QOL_Production_2000wstd $local_controls l_hhincome [aw=hhwt], absorb($FEs  ind2_1950 occ2_1950) cluster(cbsa)
		estadd local indivFE "Yes"
		estadd local indFE "Yes" 
		estadd local occupationFE "Yes" 
		estadd local cbsaFE "No"
		estadd local loccontrols "Yes"
		estadd local inc "Yes"
eststo moved3: reghdfe moved_outside_cbsa entrepreneur##c.QOL_Production_2000wstd l_hhincome [aw=hhwt], absorb($FEs ind2_1950 occ2_1950 cbsa) cluster(cbsa)
		estadd local indivFE "Yes"
		estadd local indFE "Yes" 
		estadd local occupationFE "Yes" 
		estadd local cbsaFE "Yes"
		estadd local loccontrols "No"
		estadd local inc "Yes"
		
*Test on distance moved
eststo dist1: reghdfe dists entrepreneur##c.QOL_Production_2000wstd  entrepreneur##c.QOL_Production_2000_migwstd [aw=hhwt], absorb($FEs) cluster(cbsa)
		estadd local indivFE "Yes"
		estadd local indFE "No" 
		estadd local occupationFE "No" 
		estadd local cbsaFE "No"
		estadd local loccontrols "No"
		estadd local inc "No"
eststo dist2: reghdfe dists entrepreneur##c.QOL_Production_2000wstd  entrepreneur##c.QOL_Production_2000_migwstd  $local_controls l_hhincome  [aw=hhwt], absorb($FEs ind2_1950 occ2_1950) cluster(cbsa)
		estadd local indivFE "Yes"
		estadd local indFE "Yes" 
		estadd local occupationFE "Yes" 
		estadd local cbsaFE "No"
		estadd local loccontrols "Yes"
		estadd local inc "Yes"
eststo dist3: reghdfe dists entrepreneur##c.QOL_Production_2000wstd  entrepreneur##c.QOL_Production_2000_migwstd  l_hhincome [aw=hhwt], absorb($FEs ind2_1950 occ2_1950 cbsa ) cluster(cbsa)
		estadd local indivFE "Yes"
		estadd local indFE "Yes" 
		estadd local occupationFE "Yes" 
		estadd local cbsaFE "Yes"
		estadd local loccontrols "No"
		estadd local inc "Yes"		
		
*Test on difference in QOL/Producitivty
eststo diff1: reghdfe diff entrepreneur  [aw=hhwt] if moved_outside_cbsa==1, absorb($FEs ) cluster(cbsa)
		estadd local indivFE "Yes"
		estadd local indFE "No" 
		estadd local occupationFE "No" 
		estadd local cbsaFE "No"
		estadd local loccontrols "No"
		estadd local inc "No"
eststo diff2: reghdfe diff entrepreneur $local_controls l_hhincome [aw=hhwt] if moved_outside_cbsa==1, absorb($FEs ind2_1950 occ2_1950 ) cluster(cbsa)
		estadd local indivFE "Yes"
		estadd local indFE "Yes" 
		estadd local occupationFE "Yes" 
		estadd local cbsaFE "No"
		estadd local loccontrols "Yes"
		estadd local inc "Yes"
eststo diff3: reghdfe diff entrepreneur l_hhincome [aw=hhwt] if moved_outside_cbsa==1, absorb($FEs ind2_1950 occ2_1950 cbsa ) cluster(cbsa)
		estadd local indivFE "Yes"
		estadd local indFE "Yes" 
		estadd local occupationFE "Yes" 
		estadd local cbsaFE "Yes"
		estadd local loccontrols "No"
		estadd local inc "Yes"
esttab moved1 moved2 moved3 dist1 dist2 dist3 diff1 diff2 diff3  using  "${Tables}/IPUMS_distance.tex", booktabs ///
	replace b(%12.3f) ar2(%8.3f) se(%8.2f)  star(* 0.1 ** 0.05 *** 0.01) /// 
	coeflabels(1.entrepreneur "Entrepreneur (=1)" QOL_Production_2000wstd "QOL / Productivity" 1.entrepreneur#c.QOL_Production_2000wstd "Entrepr. x QOL/Prod." QOL_Production_2000_migwstd "Prior QOL/Producitivty" 1.entrepreneur#c.QOL_Production_2000_migwstd "Entrepr. x Prior QOL/Prod.")  ///
	eqlabels(none) alignment(c c) mlabels(none) collabels(none) interaction($~\times~$) noconstant ///
	mgroups("Moved CBSA (=1)"  "Distance from prior CBSA ('00 KM')" "Difference in QOL/Productivity", pattern(1 0 0 1 0 0 1 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
	cells("b(fmt(3)star)" "se(fmt(3)par)")  /// addnote("t-statistics in parentheses \sym{*} \$p<0.10\$, \sym{**} \$p<0.05\$, \sym{***} \$p<0.01\$")
	stats(indivFE indFE occupationFE loccontrols inc cbsaFE  N r2_a, fmt(0 0 0 0 0 0 0 3) ///
	layout("\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}") labels(`"Personal FE"' `"Industry FE"' `"Occupation FE"' `"Geographic Controls"' `"Income Control"' `"CBSA FE"' `"Observations"' `"Adjusted \(R^2\)"')) ///
	keep(1.entrepreneur QOL_Production_2000wstd 1.entrepreneur#c.QOL_Production_2000wstd 1.entrepreneur#c.QOL_Production_2000_migwstd QOL_Production_2000_migwstd entrepreneur ) label
	

***Can also examine ratio of entrepreneurs to non-entrepreneurs within the CBSA, as well as moving (so collapse to CBSA)
/*Do areas with best places have higher ratios of entrepreneurs*/
preserve
gen total_pop=1
collapse (sum) entrepreneur moved_outside_cbsa moved_outside_cbsa_entrep total_pop (mean) $local_controls BP_CBSA q_2000 q_1990 QOL_diff_2000_1990 QOL_Production_2000wstd [pw=hhwt], by(cbsa)
mmerge cbsa using "${Data}/Intermediate/IMPUS_entrepreneur_counts_cbsa_1990.dta"
drop if _merge==2
gen entreprenur_to_total=entrepreneur/total_pop
gen entreprenur_to_total_moved=moved_outside_cbsa_entrep/moved_outside_cbsa
sum entreprenur_to_total entreprenur_to_total_moved
gen change_entr_to_total=entreprenur_to_total-entreprenur_to_total_1990

*Test on ratios - Table 9
label var QOL_Production_2000w "QOL/Productivity in 2000"
label var QOL_diff_2000_1990 "Change in QOL from 1990 to 2000"
est clear
eststo: reghdfe entreprenur_to_total QOL_Production_2000wstd , cluster(cbsa) noabsorb
	estadd local loccontrols "No"
eststo: reghdfe entreprenur_to_total QOL_Production_2000wstd $local_controls , cluster(cbsa) noabsorb
	estadd local loccontrols "Yes"
		reghdfe entreprenur_to_total $local_controls , cluster(cbsa) noabsorb
		
eststo: reghdfe entreprenur_to_total_moved QOL_Production_2000wstd , cluster(cbsa) noabsorb
	estadd local loccontrols "No"
eststo: reghdfe entreprenur_to_total_moved QOL_Production_2000wstd $local_controls , cluster(cbsa) noabsorb
	estadd local loccontrols "Yes"
	reghdfe entreprenur_to_total_moved $local_controls , cluster(cbsa) noabsorb

eststo: reghdfe change_entr_to_total QOL_diff_2000_1990, cluster(cbsa) noabsorb
	estadd local loccontrols "No"
eststo: reghdfe change_entr_to_total QOL_diff_2000_1990 $local_controls , cluster(cbsa) noabsorb
	estadd local loccontrols "Yes"
	reghdfe change_entr_to_total $local_controls , cluster(cbsa) noabsorb
	
esttab * using "${Tables}\IPUMS_Entrepreneurhsip.tex", booktabs ///
	replace b(%12.3f) ar2(%8.3f) se(%8.3f)  star(* 0.1 ** 0.05 *** 0.01) ///
	mgroups("\% Entrs." "\% Entrs. among movers" "Change in \% Entrs.", pattern(1 0 1 0 1 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) /// 
	label eqlabels(none) alignment(c c) mlabels(none) collabels(none) interaction($~\times~$) noconstant ///
	cells("b(fmt(3)star)" "se(fmt(3)par)")  /// addnote("t-statistics in parentheses \sym{*} \$p<0.10\$, \sym{**} \$p<0.05\$, \sym{***} \$p<0.01\$")
	stats(loccontrols N r2_a, fmt(0 0 3) ///
	layout("\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}") labels(`"Local Controls"' `"Observations"' `"Adjusted \(R^2\)"')) ///
	keep(QOL_Production_2000wstd QOL_diff_2000_1990)

restore


***Decomposition Indices***
*Collapse to single CBSA observations - Since have single observation per cbsa, need to use values for a given times
*use 1995, so 5-years prior to the 2000 QOL measure and at the start of our sample.
collapse (mean) $local_controls QOL_Production_2000wstd ///
 l_pop_1900* l_Precipitation* l_Greater_90* l_Less_20* l_Sunlight* l_Humidity* l_Water* Topography_Code*  ///
	Percent_UnHealthy_Air* Student_to_Teacher* property_crime_rate* violent_crime_rate* recreation_estab_pc* eating_estab_pc* drinking_estab_pc*  [pw=hhwt], by(cbsa)

*Label Variables
label var l_pop_1900_1995 "Log(Population in 1900)"
label var l_Precipitation_1995  "Log(Precipitation)"
label var l_Greater_90_1995 "Log(Number of Days > 90 F)"
label var l_Less_20_1995  "Log(Number of Days <20 F)"
label var l_Sunlight_1995 "Log(Average January Sun)"
label var l_Humidity_1995 "Log(Average July Humidity)"
label var l_Water_1995  "Log(\% of area covered by water)"
label var Topography_Code_1995 "Topographical Variation"
label var Percent_UnHealthy_Air_1995 "Percent of Days with Unhealthy Air"
label var Student_to_Teacher_1995 "Student-Teacher Ratio"
label var property_crime_rate_1995 "Property Crimes per 100K"
label var violent_crime_rate_1995  "Violent Crimes per 100K"
label var recreation_estab_pc_1995 "Recreation Establishments per 1,000"
label var eating_estab_pc_1995 "Eating Establishments per 1,000"
label var drinking_estab_pc_1995 "Drinking Establishments per 1,000"
		
*Table D.10
est clear
eststo: reghdfe  QOL_Production_2000wstd $local_controls  ///
	l_pop_1900_1995  l_Precipitation_1995  l_Greater_90_1995  l_Less_20_1995 l_Sunlight_1995 l_Humidity_1995 l_Water_1995 Topography_Code_1995 , cluster(cbsa) noabsorb
	estadd local loccontrols "Yes"

eststo: reghdfe  QOL_Production_2000wstd $local_controls  /// 
	Percent_UnHealthy_Air_1995 Student_to_Teacher_1995 property_crime_rate_1995  violent_crime_rate_1995  recreation_estab_pc_1995  eating_estab_pc_1995  drinking_estab_pc_1995, cluster(cbsa) noabsorb
	estadd local loccontrols "Yes"

eststo: reghdfe  QOL_Production_2000wstd $local_controls  ///
	l_pop_1900_1995  l_Precipitation_1995  l_Greater_90_1995  l_Less_20_1995 l_Sunlight_1995 l_Humidity_1995 l_Water_1995 Topography_Code_1995  ///
	Percent_UnHealthy_Air_1995 Student_to_Teacher_1995 property_crime_rate_1995  violent_crime_rate_1995  recreation_estab_pc_1995  eating_estab_pc_1995  drinking_estab_pc_1995, cluster(cbsa) noabsorb
	estadd local loccontrols "Yes"

esttab * using "${Tables}\IPUMS_Decomp.tex", booktabs ///
	replace b(%12.3f) ar2(%8.3f) se(%8.3f)  star(* 0.1 ** 0.05 *** 0.01) ///
	mgroups("Non-Time Varying" "Time Varying" "All", pattern(1 1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) /// 
	label eqlabels(none) alignment(c c) mlabels(none) collabels(none) interaction($~\times~$) noconstant ///
	cells("b(fmt(3)star)" "se(fmt(3)par)")  /// addnote("t-statistics in parentheses \sym{*} \$p<0.10\$, \sym{**} \$p<0.05\$, \sym{***} \$p<0.01\$")
	stats(loccontrols N r2_a, fmt(0 0 3) ///
	layout("\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}") labels(`"Local Controls"' `"Observations"' `"Adjusted \(R^2\)"')) ///
	keep(l_pop_1900_1995  l_Precipitation_1995  l_Greater_90_1995  l_Less_20_1995 l_Sunlight_1995 l_Humidity_1995 l_Water_1995 Topography_Code_1995  Percent_UnHealthy_Air_1995 Student_to_Teacher_1995 property_crime_rate_1995  violent_crime_rate_1995  recreation_estab_pc_1995  eating_estab_pc_1995  drinking_estab_pc_1995)
